/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "udm_common.h"
#include "udm_utils.h"
#include "udm_vars.h"
#include "udm_crc32.h"
#include "udm_doc.h"
#include "udm_result.h"
#include "udm_word.h"
#include "udm_searchtool.h"
#include "udm_searchcache.h"
#include "udm_parsehtml.h"

/***************** Results cache functions **************/

/* Compose search cache file name   */
/* taking in account all parameters */
/* and query words themself         */

static void cache_file_name(char *dst,size_t len,UDM_ENV *Conf,UDM_RESULT *Res){
	char param[4*1024];
	
	sprintf(param,"%s.%s.%s.%d.%s.%s.%s.%s.%s.%s",
		UdmVarListFindStr(&Conf->Vars,"SearchMode","all"),
		UdmVarListFindStr(&Conf->Vars, "m", ""),
		UdmVarListFindStr(&Conf->Vars, "wm", ""),
		UdmVarListFindInt(&Conf->Vars, "o", 0),
		UdmVarListFindStr(&Conf->Vars, "t", ""),
		UdmVarListFindStr(&Conf->Vars, "cat", ""),
		UdmVarListFindStr(&Conf->Vars, "ul", ""),
		UdmVarListFindStr(&Conf->Vars, "wf", ""),
		UdmVarListFindStr(&Conf->Vars, "g", ""),
		UdmVarListFindStr(&Conf->Vars, "tmplt", "")
		);
	
	snprintf(dst,len,"%s%s%s%08X.%08X",
			Conf->vardir,"cache",UDMSLASHSTR,
			UdmStrCRC32(param),
			UdmStrCRC32(UdmVarListFindStr(&Conf->Vars,"q","")));
	dst[len-1]='\0';
}


int UdmSearchCacheStore1(UDM_AGENT *A, UDM_RESULT *R){
	char	fname[1024];
	FILE	*f;
	
	fprintf(stderr, "UdmSearchCacheStore: Start\n");
	cache_file_name(fname,sizeof(fname),A->Conf,R);
	strcat(fname,".xml");
	fprintf(stderr,"write to %s\n",fname);
	if((f=fopen(fname,"w"))){
		size_t	maxlen=128*1024;
		char	*textbuf=(char*)malloc(maxlen);
		UdmResultToTextBuf(R,textbuf,maxlen);
		fprintf(f,"%s\n",textbuf);
		fclose(f);
	}
	fprintf(stderr, "UdmSearchCacheCache: Done\n");
	return UDM_OK;
}


int UdmSearchCacheFind1(UDM_AGENT *A, UDM_RESULT *R){
	char		fname[1024];
	int		fd;
	ssize_t		nread;
	size_t		buflen=128*1024;
	char		*buf=malloc(buflen);
	
	fprintf(stderr, "UdmSearchCacheFind: Start\n");
	cache_file_name(fname,sizeof(fname),A->Conf,R);
	strcat(fname,".xml");
	fprintf(stderr,"read to %s\n",fname);
	fd=open(fname,O_RDONLY);
	if(!fd)return UDM_OK;
	nread=read(fd,buf,buflen-1);
	close(fd);
	if(nread<=0)goto free;
	
	buf[nread]='\0';
	UdmResultFromTextBuf(R,buf);
	fprintf(stderr, "UdmSearchCacheFind: Done\n");
free:
	free(buf);
	return UDM_OK;
}


int UdmSearchCacheStore(UDM_AGENT * query,UDM_RESULT *Res){
	int	fd;
	char	fname[1024];
	int	i;
	int	page_number = UdmVarListFindInt(&query->Conf->Vars,"np",0);
	int	page_size   = UdmVarListFindInt(&query->Conf->Vars,"ps",20);
	size_t	topcount;
	
	topcount=page_size*(page_number+1)-1;
	if(topcount>=Res->total_found)topcount=Res->total_found-1;
	
	if(topcount<UDM_FAST_PRESORT_DOCS){
		UdmSortSearchWordsByWeight(Res->CoordList.Coords,Res->CoordList.ncoords);
		cache_file_name(fname,sizeof(fname),query->Conf,Res);
		
#ifdef DEBUG_CACHE
		fprintf(stderr,"write to %s\n",fname);
#endif
		if((fd=open(fname,O_WRONLY|O_CREAT|O_TRUNC|UDM_BINARY,UDM_IWRITE))>=0){
#ifdef DEBUG_CACHE
			fprintf(stderr, "found:%d\n", Res->total_found);	
#endif
			write(fd,&Res->total_found,4);
			
			write(fd, &(Res->WWList),sizeof(UDM_WIDEWORDLIST)); 
			for (i = 0; i< Res->WWList.nwords; i++) {
				write(fd, &(Res->WWList.Word[i]), sizeof(UDM_WIDEWORD));
				write(fd, Res->WWList.Word[i].word, Res->WWList.Word[i].len);
				write(fd, Res->WWList.Word[i].uword, sizeof(int) * Res->WWList.Word[i].len);
			}
			
			write(fd,Res->CoordList.Coords,Res->CoordList.ncoords*sizeof(UDM_URL_CRD));
			
			close(fd);
		}else{
#ifdef DEBUG_CACHE
			fprintf(stderr,"%s\n",strerror(errno));
#endif
		}
	}
	return(0);
}

int UdmSearchCacheFind(UDM_AGENT * Agent,UDM_RESULT *Res){
	char fname[1024];
	int fd;
	UDM_URL_CRD *wrd=NULL;
	int bytes;
	UDM_WIDEWORDLIST wwl;
	UDM_WIDEWORD ww;
	int i;
	int page_size   = UdmVarListFindInt(&Agent->Conf->Vars,"ps",20);
	int page_number = UdmVarListFindInt(&Agent->Conf->Vars,"np",0);
	
#ifdef DEBUG_CACHE
	fprintf(stderr, "UdmSearchCacheFind: Start\n");
#endif
	
	Res->offset = 1;
	
	cache_file_name(fname,sizeof(fname),Agent->Conf,Res);
	if((fd=open(fname,O_RDONLY|UDM_BINARY))<0)return(-1);
	
	if( (-1==read(fd,&Res->total_found,4)) ){
		close(fd);
		return(-1);
	}
	
#ifdef DEBUG_CACHE
	fprintf(stderr, " found: %d\n", Res->total_found);
#endif
	if (-1==read(fd, &wwl, sizeof(UDM_WIDEWORDLIST))) {
		close(fd);
		return(-1);
	}
#ifdef DEBUG_CACHE
	fprintf(stderr, " nwords: %d\n", wwl.nwords);
#endif
	for(i = 0; i < wwl.nwords; i++) {
	  if (-1==read(fd, &ww, sizeof(UDM_WIDEWORD))) {
	    close(fd);
	    return(-1);
	  }
	  ww.word = (char*)malloc(ww.len + 1);
	  bzero(ww.word,ww.len+1);
	  ww.uword = (int *)malloc(sizeof(int) * ww.len + 1);
	  bzero(ww.word,sizeof(int)*ww.len+1);
	  if (-1==read(fd, ww.word, ww.len)) {
	    close(fd);
	    return(-1);
	  }
	  if (-1==read(fd, ww.uword, sizeof(int) * ww.len)) {
	    close(fd);
	    return(-1);
	  }
	  UdmWideWordListAdd(&Res->WWList, &ww, 1);
	  UDM_FREE(ww.word);
	  UDM_FREE(ww.uword);
	}
	Res->WWList.nuniq = wwl.nuniq;
	
	wrd=(UDM_URL_CRD*)malloc(Res->total_found*sizeof(*wrd));
	bzero(wrd,Res->total_found*sizeof(*wrd));
	
	if(-1==lseek(fd,(off_t)0/*(page_number*page_size*sizeof(*wrd))*/,SEEK_CUR)){
		close(fd);
		return(-1);
	}
	if(-1==(bytes=read(fd, wrd, Res->total_found/* page_size*/ * sizeof(*wrd) ))){
		close(fd);
		return(-1);
	}
	close(fd);
	Res->CoordList.ncoords=bytes/sizeof(*wrd);
	UDM_FREE(Res->CoordList.Coords);
	Res->CoordList.Coords = wrd;
	
/*	Res->total_found = Res->CoordList.ncoords;*/
	Res->first = page_number * page_size;	
	if (Res->first >= Res->total_found) Res->first = Res->total_found-1;
	
	/* If results more than 1 page */
	/* we must cut the tail        */
	if((Res->first + page_size) > Res->total_found){
		Res->num_rows = Res->total_found-Res->first;
	}else{
		Res->num_rows = page_size;
	}
	Res->last = Res->first + Res->num_rows - 1;
	
	/* first and last begins from 0, make it begin from 1 */
	Res->first++;
	Res->last++;
	
	/* Allocate an array for documents information */
	Res->Doc=(UDM_DOCUMENT*)malloc(sizeof(UDM_DOCUMENT)*(Res->num_rows));
	
	/* Copy url_id and coord to result */
	for(i = 0; i < Res->num_rows; i++){
		UdmDocInit(&Res->Doc[i]);
		UdmVarListReplaceInt(&Res->Doc[i].Sections,"ID", Res->CoordList.Coords[i + Res->first * Res->offset].url_id);
		UdmVarListReplaceInt(&Res->Doc[i].Sections,"Score",(int)Res->CoordList.Coords[i + Res->first * Res->offset].coord);
	}
#ifdef DEBUG_CACHE
	fprintf(stderr, "UdmSearchCacheFind: Done\n");
#endif

	return(0);
}
